# library(devtools)
# install_github('MacoskoLab/liger')
library(tidyverse)
[30m── [1mAttaching packages[22m ───────────────────────────────────────────────────────────────────────────────────── tidyverse 1.2.1 ──[39m
[30m[32m✔[30m [34mggplot2[30m 3.2.1 [32m✔[30m [34mpurrr [30m 0.3.2
[32m✔[30m [34mtibble [30m 2.1.3 [32m✔[30m [34mdplyr [30m 0.8.3
[32m✔[30m [34mtidyr [30m 1.0.0 [32m✔[30m [34mstringr[30m 1.4.0
[32m✔[30m [34mreadr [30m 1.3.1 [32m✔[30m [34mforcats[30m 0.4.0[39m
[30m── [1mConflicts[22m ──────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
[31m✖[30m [34mtidyr[30m::[32mexpand()[30m masks [34mMatrix[30m::expand()
[31m✖[30m [34mdplyr[30m::[32mfilter()[30m masks [34mstats[30m::filter()
[31m✖[30m [34mdplyr[30m::[32mlag()[30m masks [34mstats[30m::lag()
[31m✖[30m [34mtidyr[30m::[32mpack()[30m masks [34mMatrix[30m::pack()
[31m✖[30m [34mtidyr[30m::[32munpack()[30m masks [34mMatrix[30m::unpack()[39m
library(liger)
library(Rtsne)
library(reshape2)
Attaching package: ‘reshape2’
The following object is masked from ‘package:tidyr’:
smiths
library(ggrepel)
Testing LIGER for integration of scATAC and scRNA data. Following vignette from (gitHub)[https://macoskolab.github.io/liger/walkthrough_rna_atac.html].
ATAC data is preprocessed to gene-level features (no. of reads that overlap gene body and promoter for each gene). Data is downloaded from (here)[https://umich.app.box.com/s/5hkhpou4inrulo570yhc38ay8g3ont5b]
rna_clusts = readRDS("~/10X_data/rna_cluster_assignments.RDS")
atac_clusts = readRDS("~/10X_data/atac_cluster_assignments.RDS")
pbmc.atac <- readRDS('~/10X_data/pbmc.atac.expression.mat.RDS')
pbmc.rna <- readRDS('~/10X_data/pbmc.rna.expression.mat.RDS')
List of count matrices as input. createLiger converts to sparseMatrices.
pbmc.data = list(atac=pbmc.atac[,names(atac_clusts)], rna=pbmc.rna[,names(rna_clusts)])
int.pbmc <- createLiger(pbmc.data)
[1] "Removing 97 genes not expressing in atac."
[1] "Removing 11048 genes not expressing in rna."
## Normalize to column total (adj. for sequencing depth)
int.pbmc <- normalize(int.pbmc)
## Select highly variable genes ONLY IN THE RNA DATASET
int.pbmc <- selectGenes(int.pbmc, datasets.use = 2)
## Scale to the SD
int.pbmc <- scaleNotCenter(int.pbmc)
int.pbmc <- optimizeALS(int.pbmc, k=20)
|
| | 0%
|
|= | 1%
|
|== | 2%
|
|==== | 3%
|
|===== | 4%
|
|====== | 5%
|
|======= | 6%
|
|======== | 7%
|
|========= | 8%
|
|=========== | 9%
|
|============ | 10%
|
|============= | 11%
|
|============== | 12%
|
|=============== | 13%
|
|================ | 14%
|
|================== | 15%
|
|=================== | 16%
|
|=====================================================================================================================| 100%
Converged in 3.941199 mins, 16 iterations.
Best results with seed 1.
smp <- sample(1:nrow(int.pbmc@H$atac), size = 1000)
smp_genes <- sample(1:length(int.pbmc@var.genes), size = 100)
int.pbmc@W[,smp_genes] %>% heatmap(Rowv = NA)
Error in int.pbmc@W[, smp_genes] %>% heatmap(Rowv = NA) :
could not find function "%>%"
Dodgy quantile normalization
# saveRDS(int.pbmc, file = "~/10X_data/pbmc.trainedLIGER.RDS")
int.pbmc <- readRDS(int.pbmc, file = "~/10X_data/pbmc.trainedLIGER.RDS")
Error in readRDS(int.pbmc, file = "~/10X_data/pbmc.trainedLIGER.RDS") :
object 'int.pbmc' not found
int.pbmc@scale.data$atac[,
smp_genes] %>%
melt(varnames=c("cell", "gene")) %>%
ggplot(aes(value, color=gene)) + geom_histogram() +
facet_wrap(gene~.)
int.pbmc@scale.data$rna[,
smp_genes] %>%
melt(varnames=c("cell", "gene")) %>%
ggplot(aes(value, color=gene)) + geom_histogram() +
facet_wrap(gene~.)
Even in dataset specific component there is separation of cell types (Meaningful information that is ATAC specific?)
atac.comp <- int.pbmc@H$atac[smp,] %*% int.pbmc@V$atac
Error: object 'smp' not found
rna.comp <- int.pbmc@H$rna[smp,] %*% int.pbmc@V$rna
rna.common.comp <- int.pbmc@H$rna[smp,] %*% int.pbmc@W
tsne.rna.comp <- Rtsne(rna.comp, pca=F, check_duplicates = F, theta=0.5, perplexity=30)
rownames(tsne.rna.comp$Y) <- rownames(rna.comp)
tsne.rna.common.comp <- Rtsne(rna.common.comp, pca=F, check_duplicates = F, theta=0.5, perplexity=30)
rownames(tsne.rna.common.comp$Y) <- rownames(rna.common.comp)
tsne.rna.comp$Y %>%
as.tibble(rownames="cell") %>%
mutate(clust=rna_clusts[cell]) %>%
ggplot(aes(V1, V2, color=clust)) +
geom_point()
tsne.rna.common.comp$Y %>%
as.tibble(rownames="cell") %>%
mutate(clust=rna_clusts[cell]) %>%
ggplot(aes(V1, V2, color=clust)) +
geom_point()